//%%file SSort.cu

#include <iostream>
#include <cstdlib>
#include <cstring>
#include <stdlib.h> 
#include <cuda_runtime.h>
#include "cublas_v2.h"


__global__ void selection_sort( int *data, int left, int right )
{
  for( int i = left ; i <= right ; ++i ){
    int minV = data[i];
    int minIdx = i;

    // Find the smallest value in the range [left, right].
    for( int j = i+1 ; j <= right ; ++j ){
      int val_j = data[j];
      if( val_j < minV ){
        minIdx = j;
        minV = val_j;
      }
    }

    // Swap the values.
    if( i != minIdx ){
      data[minIdx] = data[i];
      data[i] = minV;
    }
  }
}


void gpu_ssort(int *data, int n){
    int* gpuData;
    int left = 0;
    int right = n-1;

    // Allocate GPU memory.
    cudaMalloc((void**)&gpuData,n*sizeof(int));
    cudaMemcpy(gpuData,data, n*sizeof(int), cudaMemcpyHostToDevice);

    // Launch on device
    selection_sort<<<1, 1>>>(gpuData, left, right);
    cudaDeviceSynchronize();

    // Copy back
    cudaMemcpy(data,gpuData, n*sizeof(int), cudaMemcpyDeviceToHost);

    cudaFree(gpuData);
    cudaDeviceReset();
}


//Generates random numbers and assigns them to the array
void fillArray(int* arr, int size) {
	for (int i = 0; i < size; i++) {
		arr[i] = rand() % size;
	}
}


void print(int *arr, int size) {
	for (int i = 0; i < size; i++) {
		std::cout << arr[i] << " ";
	}
	std::cout << std::endl;
}


int main(int argc, char *argv[]) {

	//Get the size of the array
	int n = std::atoi(argv[1]);

	// Create an arrays of size n and allocate memory for it
	int *sArray = new int[n];

	//Fill the array with randomly generated numbers
	fillArray(sArray, n);
  //std::cout << "unordered" << std::endl;
  //print(sArray, n);
  std::cout << std::endl;

	//Call the sorting algorithms 1 by 1 with their respecive array
	gpu_qsort(sArray, n);
	std::cout << "Selection Sort performed." << std::endl;
	//print(sArray, n);


	//Deallocate the arrays
	delete[] sArray;
  
  //std::cout << cudaGetLastErrorr() << std::endl; 
  

	return 0;
}
